knitr::opts_chunk$set(echo = TRUE)
library(corrplot)
library(factoextra)
library(NbClust)
library(cluster)
library(plotly)
library(irr)
library(anytime)
library(dplyr)
library(ggdendro)
library(tidyverse)
Read in the Data
corrplot(abs(cor(task_map[-1])), method = "shade",
addCoef.col = TRUE,
tl.col = "black", type = 'lower', diag = FALSE)
task_map[-1] %>% as.matrix() %>% mean()
[1] 0.4662973
task_map[-1] %>% as.matrix() %>% median()
[1] 0.4087121
task_map[-1] %>% as.matrix() %>% sd()
[1] 0.3707203
task_map[-1] %>% as.matrix() %>% range()
[1] 0 1
df.confidence_scores_raw <- df.mapping.raw %>%
select(c(task, grep('confidence', names(df.mapping.raw)))) %>%
pivot_longer(-task, names_to = "question") %>%
mutate(
value = recode(
value,
"Very confident" = 5,
"Confident" = 4,
"Neutral" = 3,
"Not confident" = 2,
"Not at all confident" =1
)) %>%
mutate(question = gsub("_confidence", "", question))
# This is z-scored by individual user
df.confidence_scores_zscore <- df.mapping.raw %>%
select(c(task, user, grep('confidence', names(df.mapping.raw)))) %>%
pivot_longer(-c(task, user), names_to = "question") %>%
mutate(
value = recode(
value,
"Very confident" = 5,
"Confident" = 4,
"Neutral" = 3,
"Not confident" = 2,
"Not at all confident" =1
)) %>%
group_by(user) %>%
mutate(
value = scale(value)
) %>% mutate(question = gsub("_confidence", "", question)) %>% ungroup()
There is a very strong correlation between the confidence scores and the level of agreement – about 0.77. This relationship holds regardless of whether you z-score the confidence scores (which helps to account for individual-level variation in assigning confidence).
# Task-based confidence
zscored_confidence_by_task <- df.confidence_scores_zscore %>%
group_by(task) %>%
summarize(
mean_confidence = mean(value, na.rm = T)
)
task_based_confidence <- inner_join(task_based_summary, zscored_confidence_by_task, by = "task")
cor.test(task_based_confidence$mean_agreement, task_based_confidence$mean_confidence)
Pearson's product-moment correlation
data: task_based_confidence$mean_agreement and task_based_confidence$mean_confidence
t = 10.281, df = 70, p-value = 1.246e-15
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6631110 0.8538653
sample estimates:
cor
0.7756263
# Question-based confidence
zscored_confidence_by_question <- df.confidence_scores_zscore %>%
group_by(question) %>%
summarize(
mean_confidence = mean(value, na.rm = T)
)
question_based_confidence <- inner_join(question_based_summary, zscored_confidence_by_question, by = c("question_name"="question"))
cor.test(question_based_confidence$mean_agreement, question_based_confidence$mean_confidence)
Pearson's product-moment correlation
data: question_based_confidence$mean_agreement and question_based_confidence$mean_confidence
t = 5.1162, df = 18, p-value = 7.224e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.4963006 0.9042603
sample estimates:
cor
0.7697634
A version of the above with the original ordinal variables (non-normalized)
# Task-based confidence
confidence_by_task <- df.confidence_scores_raw %>%
group_by(task) %>%
summarize(
mean_confidence = mean(value, na.rm = T)
)
task_based_confidence <- inner_join(task_based_summary, confidence_by_task, by = "task")
cor.test(task_based_confidence$mean_agreement, task_based_confidence$mean_confidence)
Pearson's product-moment correlation
data: task_based_confidence$mean_agreement and task_based_confidence$mean_confidence
t = 8.5847, df = 70, p-value = 1.531e-12
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5808435 0.8129402
sample estimates:
cor
0.7161453
# Question-based confidence
confidence_by_question <- df.confidence_scores_raw %>%
group_by(question) %>%
summarize(
mean_confidence = mean(value, na.rm = T)
)
question_based_confidence <- inner_join(question_based_summary, confidence_by_question, by = c("question_name"="question"))
cor.test(question_based_confidence$mean_agreement, question_based_confidence$mean_confidence)
Pearson's product-moment correlation
data: question_based_confidence$mean_agreement and question_based_confidence$mean_confidence
t = 5.2762, df = 18, p-value = 5.127e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5140589 0.9085150
sample estimates:
cor
0.7793026
ggplot(task_based_confidence,
aes(x = mean_agreement,
y = mean_confidence)) +
geom_point() +
labs(title ="Per Task: Level of Rater Agreement v. Mean Normalized Rater Confidence")
Hierarchical Clustering
set_labels_params <- function(nbLabels,
direction = c("tb", "bt", "lr", "rl"),
fan = FALSE) {
if (fan) {
angle <- 360 / nbLabels * 1:nbLabels + 90
idx <- angle >= 90 & angle <= 270
angle[idx] <- angle[idx] + 180
hjust <- rep(0, nbLabels)
hjust[idx] <- 1
} else {
angle <- rep(0, nbLabels)
hjust <- 0
if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
if (direction %in% c("tb", "rl")) { hjust <- 1 }
}
list(angle = angle, hjust = hjust, vjust = 0.5)
}
dendro_data_k <- function(hc, k) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
xi <- hcdata$labels$x[labclust == i]
idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
idx3 <- seg$yend < height
idx <- idx1 & idx2 & idx3
segclust[idx] <- i
}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata
}
plot_ggdendro <- function(hcdata,
direction = c("lr", "rl", "tb", "bt"),
fan = FALSE,
scale.color = NULL,
branch.size = 1,
label.size = 3,
nudge.label = 0.01,
expand.y = 0.1) {
direction <- match.arg(direction) # if fan = FALSE
ybreaks <- pretty(segment(hcdata)$y, n = 5)
ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() +
geom_segment(data = segment(hcdata),
aes(x = x,
y = y,
xend = xend,
yend = yend,
linetype = factor(line),
colour = factor(clust)),
lineend = "round",
show.legend = FALSE,
size = branch.size)
## orientation
if (fan) {
p <- p +
coord_polar(direction = -1) +
scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_x_continuous(breaks = NULL)
if (direction %in% c("rl", "lr")) {
p <- p + coord_flip()
}
if (direction %in% c("bt", "lr")) {
p <- p + scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_y_continuous(breaks = ybreaks)
nudge.label <- -(nudge.label)
}
}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p +
geom_text(data = label(hcdata),
aes(x = x,
y = y,
label = label,
colour = factor(clust),
angle = angle),
vjust = labelParams$vjust,
hjust = labelParams$hjust,
nudge_y = ymax * nudge.label,
size = label.size,
show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)
}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p
}
set.seed(1)
# Dissimilarity matrix
d <- dist(task_map %>% column_to_rownames("task"), method = "euclidean")
# Hierarchical clustering using Complete Linkage
# Ward's method
hc5 <- hclust(d, method = "ward.D2" )
# get optimal number of clusters
NbClust(data = task_map %>% column_to_rownames("task"), distance = "euclidean", min.nc = 2, max.nc = 15, method = "ward.D2")
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
*******************************************************************
* Among all indices:
* 8 proposed 2 as the best number of clusters
* 6 proposed 3 as the best number of clusters
* 2 proposed 4 as the best number of clusters
* 3 proposed 11 as the best number of clusters
* 2 proposed 12 as the best number of clusters
* 1 proposed 14 as the best number of clusters
* 1 proposed 15 as the best number of clusters
***** Conclusion *****
* According to the majority rule, the best number of clusters is 2
*******************************************************************
$All.index
KL CH Hartigan CCC Scott Marriot TrCovW TraceW Friedman
2 2.7549 32.9128 14.2633 10.7304 600.0942 0.8292 24.2363 64.2186 2253.633
3 1.3224 26.5563 11.6289 7.8818 781.6212 0.1499 16.8037 53.3483 2388.235
4 1.4550 24.2083 8.6944 8.5778 882.3594 0.0658 10.9963 45.6540 2487.477
5 1.4615 22.3181 6.4038 9.3179 973.6377 0.0289 7.5798 40.4785 2601.855
6 0.9693 20.5307 6.7729 9.3506 1132.1170 0.0046 6.6994 36.9471 2895.586
7 1.4195 19.6905 5.1020 9.4864 1260.0349 0.0011 5.2321 33.5085 3364.848
8 0.9683 18.6399 5.3917 9.5818 1336.3086 0.0005 4.3205 31.0698 3490.919
9 1.1268 18.0711 4.9970 9.7785 1453.0660 0.0001 3.5107 28.6557 3625.514
10 1.0383 17.6085 4.9991 9.9450 1558.0244 0.0000 2.9226 26.5498 3826.166
11 0.9899 17.3411 5.2635 10.1924 1648.6200 0.0000 2.4316 24.5688 4220.981
12 1.6083 17.3148 3.5028 10.4000 1810.7186 0.0000 2.0846 22.6172 4863.753
13 1.0955 16.8056 3.2831 10.5764 1894.8189 0.0000 1.9193 21.3697 4893.967
14 0.9785 16.3468 3.4160 10.7074 2002.3501 0.0000 1.7617 20.2432 5177.165
15 0.9852 16.0358 3.5513 10.9101 2047.9786 0.0000 1.4892 19.1173 5296.495
Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky Ball
2 9.1488 0.4244 0.9297 0.4546 0.8080 14.7353 3.7648 0.3132 32.1093
3 11.0129 0.4504 1.3742 0.2494 0.7948 12.6534 4.0734 0.3220 17.7828
4 12.8690 0.4153 1.5590 0.2384 0.7349 12.9832 5.6480 0.3155 11.4135
5 14.5144 0.3789 1.5275 0.2004 0.5360 5.1941 11.9436 0.3032 8.0957
6 15.9017 0.3774 1.4894 0.2053 0.6652 5.5370 7.4270 0.2875 6.1579
7 17.5335 0.3570 1.3486 0.2245 0.7583 7.9684 4.9330 0.2774 4.7869
8 18.9098 0.3502 1.4456 0.1830 0.7073 4.5524 6.1063 0.2675 3.8837
9 20.5028 0.3213 1.4571 0.1937 0.5057 8.7956 14.1573 0.2579 3.1840
10 22.1291 0.3080 1.3709 0.2130 0.6376 3.9779 8.0035 0.2496 2.6550
11 23.9133 0.2979 1.3234 0.2171 0.7102 0.4081 3.2844 0.2458 2.2335
12 25.9767 0.3800 1.1764 0.2358 0.5116 2.8636 11.5233 0.2394 1.8848
13 27.4933 0.3927 1.1177 0.2416 0.6389 6.2171 8.3392 0.2337 1.6438
14 29.0231 0.3920 1.1179 0.2275 0.6594 3.0990 7.1259 0.2277 1.4459
15 30.7325 0.4353 1.1023 0.2296 0.7198 4.6705 5.7829 0.2218 1.2745
Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
2 0.7595 4.5120 0.1327 0.4941 0.0432 3.8032 0.9038 0.8035
3 0.6162 1.0674 0.5206 0.3142 0.0403 4.4006 0.8246 0.7051
4 0.5812 1.4882 1.1004 0.2872 0.0400 4.6379 0.7649 0.6663
5 0.4932 -0.0187 1.9328 0.2361 0.0418 4.5730 0.7144 0.5898
6 0.5027 0.1139 1.9576 0.2415 0.0440 4.5232 0.6859 0.5684
7 0.5115 5.3953 2.0553 0.2415 0.0438 4.3465 0.6555 0.5208
8 0.4062 0.1203 3.4674 0.2342 0.0455 5.2113 0.6260 0.4872
9 0.4094 0.1380 3.7789 0.2342 0.0467 5.1822 0.6025 0.4795
10 0.4093 0.0912 3.9426 0.2342 0.0472 4.8603 0.5775 0.4170
11 0.4117 -0.0226 4.0927 0.2388 0.0472 4.7870 0.5591 0.3972
12 0.4141 0.1071 4.0898 0.2946 0.0473 4.3009 0.5366 0.3212
13 0.4145 16.6000 4.1449 0.3071 0.0473 4.2262 0.5230 0.3108
14 0.3782 0.1407 5.0287 0.3071 0.0482 4.2857 0.5041 0.2924
15 0.3762 0.1980 5.2227 0.3429 0.0490 4.5590 0.4922 0.2932
$All.CriticalValues
CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
2 0.8565 10.3906 0.000
3 0.8425 9.1578 0.000
4 0.8220 7.7974 0.000
5 0.6446 3.3077 0.000
6 0.7153 4.3791 0.000
7 0.7939 6.4882 0.000
8 0.7153 4.3791 0.000
9 0.6929 3.9896 0.000
10 0.6634 3.5522 0.000
11 0.4372 1.2873 0.003
12 0.5578 2.3781 0.000
13 0.7153 4.3791 0.000
14 0.6446 3.3077 0.000
15 0.7246 4.5606 0.000
$Best.nc
KL CH Hartigan CCC Scott Marriot TrCovW TraceW
Number_clusters 2.0000 2.0000 4.0000 15.0000 3.0000 3.0000 3.0000 3.000
Value_Index 2.7549 32.9128 2.9346 10.9101 181.5271 0.5951 7.4326 3.176
Friedman Rubin Cindex DB Silhouette Duda PseudoT2
Number_clusters 12.0000 12.0000 11.0000 2.0000 2.0000 11.0000 11.0000
Value_Index 642.7719 -0.5469 0.2979 0.9297 0.4546 0.7102 0.4081
Beale Ratkowsky Ball PtBiserial Frey McClain Dunn
Number_clusters NA 3.000 3.0000 2.0000 4.0000 2.0000 2.0000
Value_Index NA 0.322 14.3265 0.7595 1.4882 0.1327 0.4941
Hubert SDindex Dindex SDbw
Number_clusters 0 2.0000 0 14.0000
Value_Index 0 3.8032 0 0.2924
$Best.partition
Categorization problem
1
Mastermind
1
Logic Problem
1
Sudoku
1
Rank cities by population, rank words by familiarity
1
Shopping plan
1
Carter Racing (Experimenterless Version)
1
Iterative lemonade stand task
1
Reading the mind in the eyes
1
Writing story
2
Moral Reasoning (Disciplinary Action Case)
2
Word construction from a subset of letters
1
Carter Racing
1
Guessing the correlation
1
Wolf, goat and cabbage transfer
1
Room assignment task
1
Arithmetic problem 1
1
Space Fortress
1
Visual Oddball Target
1
The N light bulbs game
1
Word completion given starting letter
1
Railroad Route Construction game
1
Allocating resources to programs
2
Game of Clue - Terrorist Attack
1
Word completion given part of word
1
NASA Moon survival
1
Image rating
1
Estimating Factual Quantities
1
Run a mini business
1
Recall videos
1
Search for Oil Task
1
To evacuate or not to evacuate
1
Estimating geological metrics
1
Euclidean traveling salesperson
1
Reproducing arts
1
Estimating social quantity
1
Hidden figures in a picture (Searching Task)
1
Estimating pages of a book
1
Abstract grid task
1
Unscramble words (anagrams)
1
Random dot motion
1
Target Search
1
Find the maximum
1
Wildcam Gorongosa (Zooniverse)
1
Recall stories
1
Recall association
1
Letters-to-numbers problems (cryptography)
1
Architectural design task
1
Recall word lists
1
Wason's Selection Task
1
Summarize Discussion
2
Divergent Association Task
2
Crisis mapping
1
9 Dot Problem
1
The Fish game
1
Advertisement writing
2
Hidden figures in a picture (Recall Task)
1
Computer maze
1
Splitting a deck of cards
1
Object based generalization for reasoning (Phyre)
1
Ravens Matrices
1
Trivia Multiple Choice Quiz
1
Railroad Route Construction game (Impossible Version)
2
Desert survival
1
Putting food into categories
2
Wildcat Wells
1
Graph coloring task
1
Husbands and wives transfer
1
Checkers
1
Typing game
1
Recall images
1
Whac-A-Mole
1
# Plot the obtained dendrogram
colors = c( "#118AB2", "#A53860", "#073B4C", "#9071EE", "#209A92", "#3E885B", "#CC9328")
hcdata <- dendro_data_k(hc5, 2)
p <- plot_ggdendro(hcdata,
direction = "lr",
scale.color = colors,
label.size = 10,
branch.size = 2,
expand.y = 4) + theme_void()
p
df.mcg <- task_map %>%
select(
task,
Q1concept_behav,
Q20type_3_type_4,
Q3type_1_planning,
Q4type_2_generate,
Q6type_5_cc,
Q7type_7_battle,
Q8type_8_performance
)
ggplot(
df.mcg %>%
rename(
Physical = Q1concept_behav,
Intellective = Q20type_3_type_4,
Planning = Q3type_1_planning,
Generative = Q4type_2_generate,
`Cognitive Conflict` = Q6type_5_cc,
Battle = Q7type_7_battle,
Performance = Q8type_8_performance
) %>%
pivot_longer(cols = -task) %>%
rename(`Mean Rater Response` = value),
aes(x = name, y = task)
) + geom_tile(aes(fill = `Mean Rater Response`)) + scale_fill_gradient(low = "#CC3363",
high = "#07BEB8") + theme(axis.text.x = element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)) +
labs(x = "Dimension in McGrath's Taxonomy",
y = "Task Name")
ggsave("26task-mcgrath-ratings.png")
Saving 7 x 9 in image
McGrath - within v. between-category variance
Physical = Q1concept_behav,
Intellective = Q20type_3_type_4,
Planning = Q3type_1_planning,
Generative = Q4type_2_generate,
`Cognitive Conflict` = Q6type_5_cc,
Battle = Q7type_7_battle,
Performance = Q8type_8_performance
task_map_discrete <- cbind(task_map$task, discretize(task_map[-1], nbins = 10)) %>%
rename(task = `task_map$task`)
df.mcg <- task_map_discrete %>%
select(
task,
Q1concept_behav,
Q20type_3_type_4,
Q3type_1_planning,
Q4type_2_generate,
Q6type_5_cc,
Q7type_7_battle,
Q8type_8_performance
)
df.laughlin <- task_map_discrete %>%
select(
task,
Q15dec_verifiability,
Q16shared_knowledge,
Q17within_sys_sol,
Q18ans_recog,
Q19time_solvability,
Q21intellective_judg_1,
Q24eureka_question
)
df.shaw <- task_map_discrete %>%
select(
task,
Q2intel_manip_1,
Q13outcome_multip,
Q14sol_scheme_mul
)
df.steiner <- task_map_discrete %>%
select(
task,
Q9divisible_unitary,
Q10maximizing,
Q11optimizing
)
df.zigurs <- task_map_discrete %>%
select(
task,
Q13outcome_multip,
Q14sol_scheme_mul,
Q22confl_tradeoffs,
Q23ss_out_uncert
)
for documentation, see: https://cran.r-project.org/web/packages/infotheo/infotheo.pdf
Confirming discretization still looks good (qualitatively)
pca <- task_map_discrete %>% #select(-continuous_questions) %>%
select(-task) %>%
prcomp(center = T)
kmeans_output <- pca$x %>% # 2 is the optimal number
kmeans(centers = 3, nstart = 100)
combined_data <- cbind(task_map,
pca$x, factor(kmeans_output$cluster)) %>%
rename(cluster = `factor(kmeans_output$cluster)`)
plot_ly(
x = combined_data$PC1,
y = combined_data$PC2,
z = combined_data$PC3,
type = "scatter3d",
mode = "markers", # can use mode = "text"
text = combined_data$task ,
color = combined_data$cluster
)
#total correlation (also known as multi-information)
multiinformation(task_map_discrete[-1])
[1] 44.50342
multiinformation(df.mcg[-1])
[1] 9.274719
multiinformation(df.laughlin[-1])
[1] 10.39968
multiinformation(df.shaw[-1])
[1] 2.64197
multiinformation(df.steiner[-1])
[1] 2.659127
multiinformation(df.zigurs[-1])
[1] 4.867023
# maybe don't run? takes forever, likely due to calculation of many conditional probabilities. also, negative and not interpretable
# interaction information
# interinformation(task_map_discrete[-1])
# interinformation(df.mcg[-1])
# interinformation(df.laughlin[-1])
# interinformation(df.shaw[-1])
# interinformation(df.steiner[-1])
# interinformation(df.zigurs[-1])
Notes on how this is supposed to work:
from https://arxiv.org/pdf/1701.08868.pdf > In the case of three random variables, interaction information is the gain (or loss) in information transmitted between any two of the variables, due to additional knowledge of the third random variable. That is, interaction information is the difference between the conditional and unconditional mutual information between two of the variables, where the conditioning is on the third variable. It is important to note that unlike (conditional) mutual information which is always non-negative, interaction information can be negative.